Notes & questions

From here:


Geweke (1992) convergence diagnostic “is a time-series approach that compares the mean and variance of segments from the beginning and end of a single chain.”

\[z = \frac{\theta_a-\theta_b}{\sqrt{Var(\theta_a)+Var(\theta_b)}} \]

“where a is the early interval and b the late interval. If the z-scores (theoretically distributed as standard normal variates) of these two segments are similar, it can provide evidence for convergence… If the chain has converged, the majority of points should fall within 2 standard deviations of zero… The occurrence of the scores well within 2 standard deviations of zero does not indicate lack of convergence, while deviations exceeding 2 standard deviations suggest that additional samples are required to achieve convergence”



find data/derived_data/gibbs_varcomp -type f -name 'last_solutions' | sed -r 's|/[^/]+$||' |sort |uniq

nohup echo -e 'renf90.par \n 818000 0 \n 20' | /usr/local/bin/thrgibbs1f90 > gibbs.iter5.3v3alt.continue.out & exit

Sampling parameters:

  • 1,000,000 total samples
  • Burn-in of 50,000 samples removed in postgibbs
  • Kept every 20th sample during sampling, thinned to every 100th sample in postgibbs

Setup

gibbs_samples <-
  purrr::map2_dfr(.x = rep(c(1:10), 
                           times = 7),
                  .y = rep(c("1", "2", "3alt", "5", "7", "8", "9"),
                           times = 10),
                  ~ read_gibbs_samples(iteration = .x,
                                 region = .y)) %>% 
  select(iter, dataset, everything(), -X1, -X3) %>% 
  purrr::set_names("iter", "dataset", "round", c(1:14)) %>% 
  tidyr::pivot_longer(cols = c(`1`:`14`),
                      names_to = "param") %>% 
  mutate(param = case_when(param == "1" ~ "dir1dir1",
                           param == "2" ~ "dir1dir2",
                           param == "3" ~ "dir1mat1",
                           param == "4" ~ "dir1mat2",
                           param == "5" ~ "dir2dir2",
                           param == "6" ~ "dir2mat1",
                           param == "7" ~ "dir2mat2",
                           param == "8" ~ "mat1mat1",
                           param == "9" ~ "mat1mat2",
                           param == "10" ~ "mat2mat2",
                           param == "11" ~ "mpe1mpe1",
                           param == "12" ~ "mpe2mpe2",
                           param == "13" ~ "res1res1",
                           param == "14" ~ "res2res2"),
         iter = as.character(iter))
gibbs_mce <-
  purrr::map2_dfr(.x = rep(c(1:10), 
                           times = 7),
                  .y = rep(c("1", "2", "3alt", "5", "7", "8", "9"),
                           times = 10),
                  ~ read_gibbs_mce(iteration = .x,
                                   region = .y)) %>%
  left_join(gibbs_samples %>% 
              mutate(iter = as.integer(iter)) %>% 
              group_by(iter, dataset, param) %>% 
              tally(name = "n_samples")) %>% 
  select(iter, dataset, param, n_samples, everything()) 
gibbs_psd <-
  purrr::map2_dfr(.x = rep(c(1:10), 
                           times = 7),
                  .y = rep(c("1", "2", "3alt", "5", "7", "8", "9"),
                           times = 10),
                  ~ read_gibbs_psd(iteration = .x,
                                 region = .y)) %>%
  left_join(gibbs_samples %>% 
              mutate(iter = as.integer(iter)) %>% 
              group_by(iter, dataset, param) %>% 
              tally(name = "n_samples")) %>% 
  select(iter, dataset, param, n_samples, everything()) 

Sanity check

gibbs_samples %>% 
  group_by(iter, dataset, param) %>% 
  tally(sort = TRUE) %>% 
  ungroup() %>% 
  distinct(iter, dataset, n)
gibbs_samples %>% 
  distinct(iter, dataset) %>% 
  arrange(dataset, iter)

Monte Carlo error by time series

Parameters with effective sample size lower than 10

gibbs_mce %>% 
  filter(10 > effective_sample_size) %>% 
  DT::datatable(rownames = FALSE)

Posterior standard deviation

Parameters with fewer than 10 independent batches

gibbs_psd %>% 
  filter(10 > independent_batches) %>% 
  DT::datatable(rownames = FALSE)

Plot post-burnin and post-thinning samples

plot_gibbs_iter <-
  function(df) {
    df %>% 
      filter(!stringr::str_detect(param, "res|mpe")) %>% 
      ggplot(aes(x = round,
                 y = value,
                 color = param)) +
      geom_line() +
      ggsci::scale_color_ucscgb() +
      theme_classic() +
      facet_wrap(~ dataset, nrow = 3)
  }
plot_gibbs_region <- 
  function(df) {
    df %>% 
      filter(!stringr::str_detect(param, "res|mpe")) %>% 
      mutate(iter = glue("Iteration {iter}")) %>% 
      ggplot(aes(x = round, 
                 y = value,
                 color = param)) +
      geom_line() +
      theme_classic() +
      ggsci::scale_color_ucscgb() +
      labs(x = "Round", 
           y = "Value",
           color = "Parameter") +
      facet_wrap(~ iter, ncol = 2)
  }

Desert

gibbs_samples %>% 
  filter(dataset == "3v1") %>% 
  plot_gibbs_region()

Southeast

gibbs_samples %>% 
  filter(dataset == "3v2") %>% 
  plot_gibbs_region()

High Plains, control

gibbs_samples %>% 
  filter(dataset == "3v3alt") %>% 
  plot_gibbs_region()

Arid Prairie

gibbs_samples %>% 
  filter(dataset == "3v5") %>% 
  plot_gibbs_region()

Forested Mountains

gibbs_samples %>% 
  filter(dataset == "3v7") %>% 
  plot_gibbs_region()

Fescue Belt

gibbs_samples %>% 
  filter(dataset == "3v8") %>% 
  plot_gibbs_region()

Upper Midwest & Northeast

gibbs_samples %>% 
  filter(dataset == "3v9") %>% 
  plot_gibbs_region()